********************************************************************************
**** SET LOCAL PATHS

	local filepath  "e:/NYU/projects/duration dependence/stata/austrian data/dta files"	
	local filepath3 "e:/NYU/projects/duration dependence/stata/austrian data/RP for IG"			
	
********************************************************************************
**** REGISTERED UNEMPLOYMENT SPELLS
	
	use "`filepath'/unemployed_days_VA.dta", clear
	
	drop if byear < 1986

	sort pid bdate 
	gen overlap = .
	replace overlap = bdate[_n+1]-edate if pid==pid[_n+1] 
	count if overlap<0
	
	keep pid bdate edate birthyear sex
	gen byte unemployed = 1
	
	by pid: gen index = _n
	sum index
	save "`filepath3'/RP_unemployed_days_VA_pid_after1986_index.dta", replace
*/


********************************************************************************	
* LAST DAY OF EMPLOYMENT AND REGISTERED UNEMPLOYMENT
********************************************************************************	

	local filepath "e:/NYU/projects/duration dependence/stata/austrian data/dta files"
	use "`filepath'/epi_all.dta", clear
	drop if lfstatus == 3
	drop if lfstatus == 5
	drop if lfstatus == 4
	drop if lfstatus == 1 & ins_type~="US"

	sort pid lfstatus
	by pid lfstatus: egen last_day = max(edate)
	format %td last_day
	by pid lfstatus: gen index = _n
	keep if index == 1
	drop index
	
	sort pid lfstatus	
	gen last_day_emp = last_day if lfstatus==1
	gen last_day_unemp = last_day[_n+1] if pid==pid[_n+1] & lfstatus[_n+1]==2
	format %td last_day_emp last_day_unemp
	sort pid
	by pid: gen index = _n
	keep if index == 1
	
	keep pid last_day_emp last_day_unemp
	compress
	save "`filepath3'/RP_epi_last_day_emp_unemp.dta", replace
*/

********************************************************************************	
* FIRST DAY OF RETIREMENT
********************************************************************************	

	use "`filepath'/epi_all.dta", clear
	keep if lfstatus==4
	
	*** ins_type	
	* PI, RU -- regular pension
	* PA, RE -- disability pension
	* PF -- early retirement
	* PT -- partial pension, partial work

	gen IT = 1 if ins_type == "PI" | ins_type == "RU" | ins_type == "PF" 
	replace IT = 2 if ins_type == "PA" | ins_type == "RE" 	
	replace IT = 3 if ins_type == "PT" 

	sort pid IT bdate
	by pid IT: egen begin_ret = min(bdate)
	format %td begin_ret
	sort pid IT
	gen begin_ret_reg_temp = begin_ret if IT==1
	gen begin_ret_dis_temp = begin_ret if IT==2
	gen begin_ret_partial_temp = begin_ret if IT==3
	
	sort pid
	by pid: egen begin_ret_reg = max(begin_ret_reg_temp)
	by pid: egen begin_ret_dis = max(begin_ret_dis_temp)
	by pid: egen begin_ret_partial = max(begin_ret_partial_temp)
	by pid: gen index = _n
	keep if index == 1
	
	keep pid begin_ret_reg begin_ret_dis begin_ret_partial
	compress 
	format %td begin_ret_reg begin_ret_dis begin_ret_partial

	save "`filepath3'/RP_epi_first_retirement.dta", replace

*/

********************************************************************************	
* FIRST DAY OF RETIREMENT
********************************************************************************	

use "`filepath'/epi_134_nooverlap_woagerestrict.dta", clear	
drop fid


* MERGE LAST EMP AND NON-EMP DAY
merge m:1 pid using "`filepath3'/RP_epi_last_day_emp_unemp.dta"
drop if _merge==2
drop _merge

* MERGE LAST EMP AND NON-EMP DAY
merge m:1 pid using "`filepath3'/RP_epi_first_retirement.dta"
drop if _merge==2
drop _merge

* LAST DAY IN THE DATASET
egen last_day = rowmax(last_day_emp - begin_ret_dis)
format %td last_day

drop if lfstatus == 4
drop if lfstatus == 1 & ins_type~= "US"
	
drop last_day_emp - begin_ret_partial

* DROP ONE-DAY LG SPELLS (THESE ARE BIRTH OF A CHILD)
drop if ins_type == "LG" & bdate==edate
	
* CONSTRUCT SPELLS
sort pid bdate edate 
gen bdate_NE = edate + 1
gen edate_NE = bdate[_n+1]-1 if pid==pid[_n+1]
replace edate_NE = last_day-1 if pid~=pid[_n+1]
format %td bdate_NE edate_NE
gen nonemp_dur = edate_NE - bdate_NE
gen next_status = lfstatus[_n+1] if pid==pid[_n+1]
replace next_status = 9 if next_status==.
* code 9 as end of the dataset
replace bdate_NE = . if nonemp_dur < 0
replace edate_NE = . if nonemp_dur < 0
compress
rename bdate bdate_spell
rename edate edate_spell
sort pid bdate_spell
by pid: gen pid_spell_index = _n
save "`filepath3'/RP_nonemployment_temp.dta", replace
*/

********************************************************************************

set more off

forval k = 1/90{
	
	use  "`filepath3'/RP_unemployed_days_VA_pid_after1986_index.dta", clear
	keep if index == `k'
	save "`filepath3'/RP_unemployed_days_VA_pid_index_`k'.dta", replace
	
	use "`filepath3'/RP_nonemployment_temp.dta", clear
	drop if edate_NE==.
	drop if bdate_NE==.	
	merge m:1 pid using "`filepath3'/RP_unemployed_days_VA_pid_index_`k'.dta"
	keep if _merge==3
	
	gen byte overlap = 0
	replace overlap = 1 if bdate_NE <=bdate & edate_NE >= bdate
	replace overlap = 1 if bdate_NE >=bdate & bdate_NE <= edate
	keep if overlap==1		
	keep pid bdate_NE edate_NE bdate edate pid_spell_index
	save "`filepath3'/RP_nonemployment_overlap_index_`k'.dta", replace
}


set more off
use "`filepath3'/RP_nonemployment_overlap_index_1.dta", clear
forval k = 2/90{
	append using "`filepath3'/RP_nonemployment_overlap_index_`k'.dta"
}
keep pid pid_spell_index bdate_NE edate_NE
sort pid pid_spell_index
by pid pid_spell_index: gen index = _n
keep if index == 1
drop index
rename bdate_NE bdate_NE_check
rename edate_NE edate_NE_check
gen byte registered_unemp = 1
save "`filepath3'/RP_nonemployment_overlap_index_all.dta", replace
*/

********************************************************************************
* PREPARE THE FILE
********************************************************************************

use  "`filepath3'/RP_nonemployment_temp.dta", clear
merge 1:1 pid pid_spell_index using  "`filepath3'/RP_nonemployment_overlap_index_all.dta"
drop _merge
replace registered_unemp = 0 if bdate_NE_check ==.
count if bdate_NE~=bdate_NE_check & registered_unemp ==1
count if edate_NE~=edate_NE_check & registered_unemp ==1

drop bdate_NE_check edate_NE_check 

* AGE RESTRICTION
	* truncate labor market history at the age of 60
	gen byear = year(bdate_spell)
	gen eyear = year(edate_spell)
	
	gen bage =  byear - birthyear
	gen eage =  eyear - birthyear
	
	drop if eage < 25
	drop if bage > 60
		
	gen date_truncate = mdy(12,31,birthyear+60)	
	format %td date_truncate
	replace edate_spell = date_truncate if edate_spell > date_truncate

* MERGE EMPLOYMENT SPELLS IF THEY ARE LESS THAN 2 MONTHS APART
	sort pid bdate_spell
	gen gap  = bdate_spell - edate_spell[_n-1] if pid==pid[_n-1]
	replace gap = . if lfstatus~=1 
	replace gap = . if lfstatus[_n-1]~=1 
	
	gen byte tomerge = 1 if gap < 62 & registered_unemp[_n-1] == 0
	*gen byte tomerge = 1 if gap < 183 & registered_unemp[_n-1] == 0
	gen todrop = 0
	replace bdate_spell = bdate_spell[_n-1] if tomerge == 1
	replace todrop = 1 if tomerge[_n+1] == 1	
	drop if todrop == 1
	drop tomerge todrop
	
* ADD NON-EMPLOYMENT SPELLS
	gen byte todup = 1
	replace todup = 2 if nonemp_dur >= 0
	expand todup, gen(dup)
	sort pid pid_spell_index dup
	replace bdate_spell = bdate_NE if dup == 1
	replace edate_spell = edate_NE if dup == 1
	* non-employment spell
	replace lfstatus = 2 if dup == 1 & registered_unemp ==1	
	* out-of-sample spell
	replace lfstatus = 7 if dup == 1 & registered_unemp ==0

* TRUNCATE NON-EMPLOYMENT SPELLS AT THE AGE OF 60
	gen byte truncated_age = 1 if edate_spell > date_truncate & edate_spell~=.
	replace edate_spell = date_truncate if truncated_age == 1
	replace next_status = 9 if truncated_age == 1
	
* DEFINE SEGMENTS
	sort pid bdate_spell
	gen byte same_segment = 0 
	replace same_segment = 1 if pid==pid[_n-1] & ( lfstatus== 1 | lfstatus==2) 

* FIND BEGINNING OF SEGMENTS
	gen segment_interrupt = 0
	replace segment_interrupt = 1 if pid~=pid[_n-1]
	replace segment_interrupt = 1 if (pid==pid[_n-1]) & (lfstatus[_n-1] == 3 | lfstatus[_n-1] == 7)

	gen segment_beg = 0
	replace segment_beg = 1 if pid==pid[_n-1] & lfstatus==2 & lfstatus[_n-1] == 1 & segment_interrupt[_n-1]==1

	by pid: egen segment_beg_tot = total(segment_beg)
	drop if segment_beg_tot == 0
	drop segment_beg_tot

	sort pid bdate_spell
	by pid: gen segment_number = _n
	replace segment_number = . if segment_beg ~=1
	replace segment_number = segment_number[_n-1] if segment_number==. & pid==pid[_n-1] & (lfstatus == 1 |lfstatus==2)

* SEQUENTIALLY CREATE SEGMENTS
	sort pid segment_number
	by pid segment_number: egen segment_bdate = min(bdate_spell)
	by pid segment_number: egen segment_edate = max(edate_spell)
	format %td segment_bdate segment_edate
	replace segment_bdate = . if segment_number ==.
	replace segment_edate = . if segment_number ==.
	
* keep only well-defined segments
	drop if segment_number ==.	

* gen Ti	
	 gen Ti_days = segment_edate - segment_bdate
	
* keep the longest Ti
	sort pid bdate_spell
	by pid: egen max_Ti = max(Ti_days)	 
	gen xx = 1 if Ti_days == max_Ti
	keep if xx == 1
	drop xx
	
* if there are multiple segments of the same size, keep the first one
	sort pid segment_number bdate_spell
	by pid segment_number: gen index = _n
	replace index = 0 if index>1
	by pid: egen index_tot = total(index)	
	browse if index_tot >1
	
	by pid: egen segment_min = min(segment_number)
	gen todrop = 1 if segment_min~=segment_number
	drop if todrop == 1
	drop todrop 

	drop index index_tot segment_min max_Ti
	drop byear eyear bage eage todup dup
	drop same_segment segment_interrupt segment_number	segment_beg
	drop date_truncate registered_unemp pid_spell_index
	drop bdate_NE edate_NE nonemp_dur ins_type
	compress	
	save "`filepath3'/RP_workers_longest_segment.dta", replace
	
*/
	

********************************************************************************
* NON-EMPLOYMENT SPELLS
********************************************************************************

	local filepath  "e:/NYU/projects/duration dependence/stata/austrian data/dta files"	
	local filepath3 "e:/NYU/projects/duration dependence/stata/austrian data/RP for IG"		
	
	use  "`filepath3'/XXX_workers_longest_segment.dta", clear
		
	* collapse consecutive employment spells
	sort pid bdate_spell
	by pid: gen pid_index = _n
	
	gen tomerge = 1 if pid==pid[_n-1] & lfstatus == 1 & lfstatus[_n-1]==1
	gen todrop = 0
	replace bdate_spell = bdate_spell[_n-1] if tomerge == 1
	replace todrop = 1 if tomerge[_n+1] == 1
	
	drop if todrop == 1
	drop tomerge todrop
	
	gen bdate_week = floor((bdate_spell - mdy(1,3,1972))/7)+1
	gen edate_week = floor((edate_spell - mdy(1,3,1972))/7)+1
	gen duration   = edate_week - bdate_week

	sort pid bdate_spell
	by pid: egen Ti = total(duration)

	gen duration_next = duration[_n+1] if pid==pid[_n+1]

	* keep only non-employment spells
	keep if lfstatus == 2
	sort pid bdate_spell
	by pid: gen spell_number = _n
	by pid: gen spell_all = _N

	rename duration_next duration_emp
	
	* SAVE MORE INFO TO BE USED to construct data with observable characteristics
	gen byte A1_temp = 0
	replace A1_temp = 1 if (spell_number == 1) & (Ti > 2*`T') & (duration <= `T') & (Ti - 2*`T' > duration_emp)
	gen byte B1_temp = 0
	replace B1_temp  = 1 if (spell_number == 1) & (Ti > 2*`T') & (duration > `T')	& (Ti - 2*`T' > duration_emp) &  (spell_all > 1) 
	gen byte B3a_temp = 0
	replace B3a_temp = 1 if  (spell_number == 1) & (Ti > 2*`T') & (duration > `T')	& (spell_all == 1) & (next_status ==1) & (duration > 2*`T')
	gen byte B4_temp = 0
	replace B4_temp = 1 if (spell_number == 1) & (Ti > 2*`T') & (duration > `T')	& (spell_all == 1) & (next_status ~=1)
	sort pid spell_number
	by pid: egen A1 = total(A1_temp)
	by pid: egen B1 = total(B1_temp)
	by pid: egen B3a= total(B3a_temp)
	by pid: egen B4 = total(B4_temp)
	gen tokeep_obs = A1 + B1 + B3a + B4
	keep if tokeep_obs>=1
	keep if spell_number <=2
	save "`filepath3'/RP_IG_raw_0_`T1'_RP_allinfo.dta", replace
	
	
	
	local T = 104
	local T1 = `T'+1
		
	count if spell_number == 1
	* group I
	count if (spell_number == 1) & (Ti > 2*`T')
	* group I.A
	count if (spell_number == 1) & (Ti > 2*`T') & (duration <= `T')		
	* group I.A.1
	count if (spell_number == 1) & (Ti > 2*`T') & (duration <= `T') & (Ti - 2*`T' > duration_emp)
	count if (spell_number == 1) & (Ti > 2*`T') & (duration <= `T') & (Ti - 2*`T' > duration_emp) & (pid==pid[_n+1]) & (duration[_n+1] <= `T')
	count if (spell_number == 1) & (Ti > 2*`T') & (duration <= `T') & (Ti - 2*`T' > duration_emp) & (pid==pid[_n+1]) & (duration[_n+1] > `T')	
	count if (spell_number == 1) & (Ti > 2*`T') & (duration <= `T') & (Ti - 2*`T' > duration_emp) & (pid~=pid[_n+1]) 
	count if (spell_number == 1) & (Ti > 2*`T') & (duration <= `T') & (Ti - 2*`T' > duration_emp) & (spell_all == 1) 	
	* group I.A.2
	count if (spell_number == 1) & (Ti > 2*`T') & (duration <= `T') & (Ti - 2*`T' <= duration_emp) &  (spell_all > 1) 
	* group I.A.3
	count if (spell_number == 1) & (Ti > 2*`T') & (duration <= `T') & (spell_all == 1)
	
	
	* group I.B
	count if (spell_number == 1) & (Ti > 2*`T') & (duration > `T')		
	* group I.B.1
	count if (spell_number == 1) & (Ti > 2*`T') & (duration > `T')	& (Ti - 2*`T' > duration_emp) &  (spell_all > 1) 
	local grpB1 = r(N)
	* group I.B.2
	count if (spell_number == 1) & (Ti > 2*`T') & (duration > `T')	& (Ti - 2*`T' <= duration_emp) & (spell_all > 1) 		
	* group I.B.3
	count if (spell_number == 1) & (Ti > 2*`T') & (duration > `T')	& (spell_all == 1) & (next_status ==1)
	count if (spell_number == 1) & (Ti > 2*`T') & (duration > `T')	& (spell_all == 1) & (next_status ==1) & (duration > 2*`T')
	local grpB3a = r(N)
	count if (spell_number == 1) & (Ti > 2*`T') & (duration > `T')	& (spell_all == 1) & (next_status ==1) & (duration <= 2*`T') 
	* group I.B.4
	count if (spell_number == 1) & (Ti > 2*`T') & (duration > `T')	& (spell_all == 1) & (next_status ~=1)
	local grp4 = r(N)
	* group II
	count if (spell_number == 1) & (Ti <= 2*`T') 
	
		
	
	**** SAMPLE FOR ESTIMATION
	gen byte tokeep  = 0
	replace tokeep = 1 if (spell_number == 1) & (Ti > 2*`T') & (duration <= `T') & (Ti - 2*`T' > duration_emp)
	
	sort pid spell_number 
	by pid: egen tokeep_tot = total(tokeep)
	keep if tokeep_tot>=1
	keep if spell_number<=2	
	
	keep pid duration spell_number
	replace duration = `T1' if duration>=`T1'
	
	sort pid spell_number 
	gen duration2 = duration[_n+1] if pid==pid[_n+1]

	keep if spell_number == 1
	sort duration duration2
	
	egen grp = group(duration duration2)
	sort grp
	by grp: gen index = _n
	by grp: gen people = _N
	keep if index == 1
	keep duration duration2 people
	
	* add observation	
	gen exp_num = 1
	replace exp_num = 4 if duration==`T' & duration2 == `T1'
	expand exp_num, gen(dup)
	replace duration  = `T1' if dup==1
	sort duration duration2
	
	
	by duration duration2: gen index = _n
	replace index =. if duration <`T1'
	replace duration2 = 300 if index ==1
	replace people = `grpB1' if index ==1
	
	replace duration2 = 400 if index ==2
	replace people = `grpB3a' if index == 2

	replace duration2 = 500 if index ==3
	replace people = `grp4' if index == 3
		
	rename duration nonemp1 
	rename duration2 nonemp2
	sort nonemp1 nonemp2
	outsheet nonemp1 nonemp2 people using "`filepath3'/IG_raw_0_`T1'_RP.txt", nonames replace
	




